

/* add a command to import "survey_and_created_variables" from the NLSY79 Geocode Data Here */

gen loc_na = ( FAM_5A_1979<0 | FAM_5B_1979<0 ) // not available
gen loc_out = FAM_5B_1979>56 // out of 50 states + DC
rename FAM_5B_1979 statefip
rename FAM_5A_1979 countyfip

replace countyfip=129 if statefip== 1 & countyfip==200 // Washington county, AL
replace countyfip= 95 if statefip==29 & countyfip==700 // Jackson county, MO (Kansas City)
replace countyfip=123 if statefip==36 & countyfip==300 // Wyoming county, NY (Attica)
replace countyfip= 61 if statefip==36 & countyfip==500 // New York, NY (NYC)
replace countyfip=103 if statefip==36 & countyfip==600 // Suffolk county, NY (Long Island)
replace countyfip=143 if statefip==40 & countyfip==400 // Tulsa county, OK (Tulsa)

merge m:1 statefip countyfip using ../import/geo_info.dta, keep(1 3) force
list statefip countyfip if _merge==1 & loc_na==0 & loc_out==0
gen loc_um = (_merge==1) 

label var loc_na "missing location at age 14"
label var loc_out "not in 50 states or DC at age 14"
label var loc_um "invalid location record at age 14"
 
tab loc_na, m
tab loc_out if loc_na==0, m
tab loc_um if loc_na==0 & loc_out==0, m

egen cty_grp = group(statefip countyfip) // state-county ID for clustering

sort CASEID_1979
keep CASEID_1979 cty_grp loc_na loc_out loc_um regiona divisiona pub2 pub4 dist2 dist4 tui2_c tui4_c ///
uafrac uapop urate1974 urate1975 urate1976 urate1977 urate1978 urate1979 urate1980 urate1981 ///
earn_pe1974 earn_pe1975 earn_pe1976 earn_pe1977 earn_pe1978 earn_pe1979 earn_pe1980 earn_pe1981

save ../processed/college79.dta, replace

